org 100h        ; assume al=0 bx=0 sp=di=-2 si=0100h bp=09??h; last 16 bytes of PSP = 0
PN equ 104h-4*5 ; dd 0.0, -1.0, 0.0  ; fallthrough
RO equ 104h-4*3 ; dd 0.0, 0.0, -48.25
PD:  dw -16     ; F0FF40C2: lock inc word [bx+si-0x3e]
     db 0x40,0xC2
I:mov word[PN+4+2 + si-100h],0xBF80
  mov al,0x13
  int 0x10
;  push 0xA000-160/16 ; es=9FF6 - want X=0 in the center (other valid segments are 9FF6-N*320/16: 9FE2 9FCE 9FBA 9FA6)
;  pop  es       ; visible pixels are A0000..AFA00

; Palette: 4 bits orange * 4 bits blue. Uses index 0 (black) from default palette.
  mov  al,13h
P int  10h      ; set video mode | set palette index: bx=i dh=R ch=G cl=B
  inc  bx
  mov  al,bl
  aam  16       ; ax = ....rrrr....bbbb
  imul dx,ax,4
  mov  ax,1010h
  mov  cx,dx
  add  ch,cl
  shr  ch,1     ; ch=G = (R+B)/2
  jnz  P        ; dx=cx=0 bx=100h ax=1010h
  
M:xor di,di
  mov bx,0xA000-160/16
  mov es,bx
X:
  mov ax,0xCCCD ; dx:ax = 00800020 @ 0 0 (di=160) .. C87F6553 @ 319 199 (di=64159)
  mul di
  push dx
  add dh,bh     ; +80h if you want dy=0 below center, +9B is mid, +B7h is dy=0 above center
                ; make X and Y next to each other on the stack
  fninit        ; adr:        -18 -16 -14 -12 -10 -8 -6 -4  -2
  pusha         ; stack:   di  si  bp  sp  bx  dx cx ax dx   0
  xor  bx,bx    ; s16: pixadr 100h     -2 100h Y  T  ..X..
  fild dword[si+2]   ; Z=big_positive_number
  fild dword[bx-12]  ; Y
  fild dword[bx-7]   ; X   |rD.x rD.y rD.z

  mov di,104h
  pusha
  mov bx,bp
  call STORE_DOT    ;V=rD (unnormalized);  |rD*rD
  fsqrt
  fld1
  fdivrp st1        ;|rsqrt(rD*rD)
  call LOAD_SCALE   ;|rd.x rd.y rd.z

  mov bx,PN-0x100
  call STORE_DOT    ;V=rd   |D=pn*rd
  mov bp,RO-0x100
  call DOT
  popa

  fisubr word[si];PD   ;|N=pd-pn*ro D
  fdivrp            ;|t
  ftst              ;set carry flag if t<0
  fnstsw ax
  sahf
  call LOAD_SCALE   ;|t*rd.x t*rd.y t*rd.z
  
                    ; h = {ro + rd*t}; we need only x and z
  fistp word[bx-10] ; pushed_dx = s16(h.x)
  fstp st0
  fadd dword[si];=[RO+8 + si-100h]
  fiadd word[bx-8]  ; T (pushed_cx)
  fistp word[bx-6]  ; pushed_ax = ax XOR s16(h.z + T)

  popa
  jb  S
  xor ax,dx
  shr ax,4          ;ah=0 (last adr/320)
  salc
  db 0xA9 ; skip 2 bytes
S:mov al,0xF0
  stosb
  pop dx
  or  di,di
  jnz X
  inc cx  ; T++
  in al,60h
  cmp al,1
  jnz M
 ;ret     ; fallthrough

;LOAD: fld1 | call
LOAD_SCALE: ; v3* bp+si, |k --> k*x k*y k*z
  fld dword[bp+di]
  fmul st1           ;|ky k
  fld dword[bp+si+8]
  fmul st2
  fxch st2           ;|k ky kz
  fmul dword[bp+si]  ;|kx ky kz
  ret

;STORE: call | fstp st0
STORE_DOT: ; v3* bp+si, |x y z -->
  fstp dword[bp+si]
  fstp dword[bp+di]
  fstp dword[bp+si+8]

DOT:  ; v3* bp+si, v3* bx+si, | --> ax*bx+ay*by+az*bz
  fld dword[bp+si]
  fmul dword[bx+si]
  fld dword[bp+di]
  fmul dword[bx+di]
  faddp
  fld dword[bp+si+8]
  fmul dword[bx+si+8]
  faddp
  ret
